library(readxl)
library(dplyr)
library(ggplot2)
library(scales)
library(ade4)
library(plotrix)
library(stargazer)
# *** CHANGE GENE NAME ***
gene_name <- "EPHA7"
microarray <- read_excel("/Users/schilder/Desktop/Dissertation/Gene_Expression/ABA_Microarray_data.xlsx", na = "NA", sheet = paste(gene_name))
str(microarray)
## Classes 'tbl_df', 'tbl' and 'data.frame': 2892 obs. of 17 variables:
## $ Species : chr "Human" "Human" "Human" "Human" ...
## $ donor_id : num 9861 9861 9861 9861 9861 ...
## $ donor_name : chr "H0351.2001" "H0351.2001" "H0351.2001" "H0351.2001" ...
## $ donor_age : chr "24 years" "24 years" "24 years" "24 years" ...
## $ donor_color : chr "EC891D" "EC891D" "EC891D" "EC891D" ...
## $ structure_id : num 4053 4078 4888 4047 4041 ...
## $ structure_name : chr "anterior orbital gyrus" "frontal operculum" "frontal pole" "gyrus rectus" ...
## $ structure_abbreviation : chr "AOrG" "fro" "FP" "GRe" ...
## $ structure_color : chr "E8BF59" "E8C159" "E8C359" "E8C459" ...
## $ top_level_structure_id : num 4009 4009 4009 4009 4009 ...
## $ top_level_structure_name : chr "frontal lobe" "frontal lobe" "frontal lobe" "frontal lobe" ...
## $ top_level_structure_abbreviation: chr "FL" "FL" "FL" "FL" ...
## $ top_level_structure_color : chr "E8CD59" "E8CD59" "E8CD59" "E8CD59" ...
## $ 1057236 : num 0.228 -0.213 -0.199 -0.24 -0.378 ...
## $ 1057237 : num -0.494 0.22 -0.102 -0.281 0.417 ...
## $ 1065061 : num -0.684 -0.577 -0.624 -0.743 -0.857 ...
## $ Expression : num -0.316 -0.19 -0.308 -0.421 -0.273 ...
RNAseq <- read_excel("/Users/schilder/Desktop/Dissertation/Gene_Expression/ABA_RNAseq_data.xlsx", na = "NA", sheet = paste(gene_name))
str(RNAseq)
## Classes 'tbl_df', 'tbl' and 'data.frame': 524 obs. of 13 variables:
## $ donor_id : num 13058 13058 13058 13058 13058 ...
## $ donor_name : chr "H376.IIA.51" "H376.IIA.51" "H376.IIA.51" "H376.IIA.51" ...
## $ donor_age : chr "8 pcw" "8 pcw" "8 pcw" "8 pcw" ...
## $ donor_color : chr "0000FF" "0000FF" "0000FF" "0000FF" ...
## $ structure_id : num 10173 10185 10278 10194 10291 ...
## $ structure_name : chr "dorsolateral prefrontal cortex" "ventrolateral prefrontal cortex" "anterior (rostral) cingulate (medial prefrontal) cortex" "orbital frontal cortex" ...
## $ structure_abbreviation : chr "DFC" "VFC" "MFC" "OFC" ...
## $ structure_color : chr "D4B235" "C2A335" "E26880" "E2D4A7" ...
## $ top_level_structure_id : num 10153 10153 10153 10153 10153 ...
## $ top_level_structure_name : chr "neural plate" "neural plate" "neural plate" "neural plate" ...
## $ top_level_structure_abbreviation: chr "NP" "NP" "NP" "NP" ...
## $ top_level_structure_color : chr "D7D8D8" "D7D8D8" "D7D8D8" "D7D8D8" ...
## $ Expression : num 5.09 5.43 4.73 3.61 5.42 ...
macaque_adult <- filter(microarray, Species == "Macaque", donor_age == "48 mo") %>% droplevels()
# Top structures: Interindividual variation
ggplot(data = macaque_adult) + geom_boxplot(aes(x=donor_name, y=Expression, fill=top_level_structure_name)) + labs(title=paste("Macaque",gene_name,"Expression:\nInterindividual Variation"), x="Donor ID", y="Expression Level")
# Top structures: Average
ggplot(data = macaque_adult) + geom_boxplot(aes(x=top_level_structure_name, y=Expression, fill=top_level_structure_name)) +
labs(title=paste("Adult Macaque",gene_name,"Expression: \nAverage"), x="Structure Name", y="Expression Level") + scale_x_discrete(labels=c("Amygdaloid Complex", "Basal ganglia", "Total Brain", "Hippocampal formation")) +
theme(legend.position="none", axis.text.x = element_text(color="black"))
# Hippocampal substructures: Interindividual Variaton
ggplot(data = filter(macaque_adult, top_level_structure_name == "hippocampal cortex (hippocampal formation)")) +
geom_bar(aes(x=donor_name, y=Expression, fill=structure_name), stat="identity", position="dodge") +
labs(title=paste("Adult Macaque",gene_name,"Expression: Hippocampal Substructures: \nInterindividual Variation"), x="Substructure", y="Expression Level")
# Hippocampal substructures: Average
ggplot(data = filter(macaque_adult, top_level_structure_name == "hippocampal cortex (hippocampal formation)")) + geom_boxplot(aes(x=structure_name, y=Expression, fill=structure_name)) +
labs(title=paste("Adult Macaque",gene_name,"Expression: Hippocampal Substructures: \nAverage"), x="Substructure", y="Expression Level") +
theme(legend.position="none", legend.key.size=grid::unit(.4, "cm"), legend.text=element_text(size=7), axis.text.x = element_text(angle=45, color="black", size=10, hjust=1))
# Macaque: For postnatal brains, RIGHT hems were used for microarray and left hems were used for ISH.
# Human: Both left and right included separately for humans.
# Duplicate structure_names
microarray$structure_name2 <- microarray$structure_name
# Change names of macaque subregions to be more general
# ** Assumes left and right hemisphere are not significantly different
microarray$structure_name2 <- gsub("granular layer of dentate gyrus (cortex)", "DG", microarray$structure_name2, fixed=TRUE)
microarray$structure_name2 <- gsub("subgranular zone of dentate gyrus (cortex)", "DG", microarray$structure_name2, fixed=TRUE)
microarray$structure_name2 <- gsub("CA4 region", "CA4", microarray$structure_name2)
microarray$structure_name2 <- gsub("polyform layer of dentate gyrus (cortex)", "DG", microarray$structure_name2, fixed=TRUE)
microarray$structure_name2 <- gsub("stratum pyramidale of CA3", "CA3", microarray$structure_name2)
microarray$structure_name2 <- gsub("stratum pyramidale of CA2", "CA2", microarray$structure_name2)
microarray$structure_name2 <- gsub("stratum pyramidale of CA1", "CA1", microarray$structure_name2)
microarray$structure_name2 <- gsub("stratum oriens of CA1", "CA1", microarray$structure_name2)
microarray$structure_name2 <- gsub("stratum radiatum of CA1", "CA1", microarray$structure_name2)
microarray$structure_name2 <- gsub("subiculum", "Sub", microarray$structure_name2)
# Repeat for human subregions
microarray$structure_name2 <- gsub("CA1 field, left", "CA1", microarray$structure_name2)
microarray$structure_name2 <- gsub("CA1 field, right", "CA1", microarray$structure_name2)
microarray$structure_name2 <- gsub("CA1 field", "CA1", microarray$structure_name2)
microarray$structure_name2 <- gsub("CA2 field, left", "CA2", microarray$structure_name2)
microarray$structure_name2 <- gsub("CA2 field, right", "CA2", microarray$structure_name2)
microarray$structure_name2 <- gsub("CA2 field", "CA2", microarray$structure_name2)
microarray$structure_name2 <- gsub("CA3 field, left", "CA3", microarray$structure_name2)
microarray$structure_name2 <- gsub("CA3 field, right", "CA3", microarray$structure_name2)
microarray$structure_name2 <- gsub("CA3 field", "CA3", microarray$structure_name2)
microarray$structure_name2 <- gsub("CA4 field, left", "CA4", microarray$structure_name2)
microarray$structure_name2 <- gsub("CA4 field, right", "CA4", microarray$structure_name2)
microarray$structure_name2 <- gsub("CA4 field", "CA4", microarray$structure_name2)
microarray$structure_name2 <- gsub("dentate gyrus, left", "DG", microarray$structure_name2)
microarray$structure_name2 <- gsub("dentate gyrus, right", "DG", microarray$structure_name2)
microarray$structure_name2 <- gsub("dentate gyrus", "DG", microarray$structure_name2)
microarray$structure_name2 <- gsub("Sub, left", "Sub", microarray$structure_name2)
microarray$structure_name2 <- gsub("Sub, right", "Sub", microarray$structure_name2)
microarray$structure_name2 <- gsub("subiculum", "Sub", microarray$structure_name2)
# rescale macaques and humans to make comparable (**Double check that I'm doing this right)
mac_scaled <- filter(microarray, Species == "Macaque", donor_age=="48 mo")
mac_scaled$Expression <- rescale(mac_scaled$Expression, c(-1,1))
human_scaled <- filter(microarray, Species=="Human")
human_scaled$Expression <- rescale(human_scaled$Expression, c(-1,1))
combo <- rbind(mac_scaled, human_scaled)
adult_HP <- filter(combo, top_level_structure_name == "hippocampal formation"|top_level_structure_name == "hippocampal cortex (hippocampal formation)", structure_name2 != "NA") %>% droplevels()
unique(adult_HP$structure_name2)
## [1] "DG" "CA1" "CA2" "CA3" "CA4" "Sub"
Separate adult macaques, then rescale them by themselves. This is because the original macaque data has developmental data, while the original human data is only adults. This difference could cause a bias to make macaques seem like they have lower expression, since many of these genes are highly upregulated during development.
# Whole hippocampus
HP_model <- lm(data = filter(adult_HP), Expression ~ Species)
# DG model
DG_model <- lm(data = filter(adult_HP, structure_name2=="DG"), Expression ~ Species)
# CA4 model
CA4_model <- lm(data = filter(adult_HP, structure_name2=="CA4"), Expression ~ Species)
# CA3 model
CA3_model <- lm(data = filter(adult_HP, structure_name2=="CA3"), Expression ~ Species)
# CA2 model
CA2_model <- lm(data = filter(adult_HP, structure_name2=="CA2"), Expression ~ Species)
# CA1 model
CA1_model <- lm(data = filter(adult_HP, structure_name2=="CA1"), Expression ~ Species)
# Sub model
Sub_model <- lm(data = filter(adult_HP, structure_name2=="Sub"), Expression ~ Species)
stargazer(HP_model, DG_model, CA4_model, CA3_model, CA2_model, CA1_model, Sub_model,
type="html", dep.var.labels = "", dep.var.caption = paste(gene_name, "Expression"), model.numbers = FALSE, column.labels = c("Whole HP", "DG", "CA4", "CA3", "CA2", "CA1", "Sub"), covariate.labels = c("Species", "Intercept"), star.cutoffs=c(0.05, 0.01, 0.001), align = TRUE, notes.align = "l", no.space = TRUE, title="Hippocampal Subfield Expression: Interspecies Comparisons", report="vcsp*", df = FALSE)
| EPHA7 Expression | |||||||
| Whole HP | DG | CA4 | CA3 | CA2 | CA1 | Sub | |
| Species | 0.136 | -0.236 | 0.180 | 0.309 | 0.046 | 0.055 | 0.169 |
| (0.076) | (0.124) | (0.030) | (0.054) | (0.053) | (0.121) | (0.064) | |
| p = 0.081 | p = 0.079 | p = 0.001*** | p = 0.001*** | p = 0.423 | p = 0.661 | p = 0.033* | |
| Intercept | 0.234 | 0.864 | 0.142 | 0.033 | -0.018 | 0.171 | 0.212 |
| (0.050) | (0.096) | (0.015) | (0.031) | (0.031) | (0.092) | (0.037) | |
| p = 0.00002*** | p = 0.00000*** | p = 0.0001*** | p = 0.326 | p = 0.583 | p = 0.086 | p = 0.001*** | |
| Observations | 64 | 15 | 8 | 9 | 9 | 14 | 9 |
| R2 | 0.049 | 0.219 | 0.860 | 0.824 | 0.094 | 0.017 | 0.503 |
| Adjusted R2 | 0.033 | 0.159 | 0.837 | 0.799 | -0.036 | -0.065 | 0.433 |
| Residual Std. Error | 0.302 | 0.235 | 0.036 | 0.076 | 0.076 | 0.224 | 0.090 |
| F Statistic | 3.167 | 3.654 | 36.944*** | 32.712*** | 0.725 | 0.203 | 7.097* |
| Note: | p<0.05; p<0.01; p<0.001 | ||||||
library(tidyr)
grouped <- adult_HP %>% spread(key=structure_name2, value=Expression) %>%
group_by(donor_name) %>%
dplyr::summarise(DG = mean(DG, na.rm=TRUE),
#CA4 = mean(CA4, na.rm=TRUE),
CA3 = mean(CA3, na.rm=TRUE),
CA2 = mean(CA2, na.rm=TRUE),
CA1 = mean(CA1, na.rm=TRUE),
Sub = mean(Sub, na.rm=TRUE))
grouped$Species <- grouped$donor_name
grouped$Species <- gsub(pattern = "H0351.1009", replacement = "Human", grouped$Species, fixed=TRUE)
grouped$Species <- gsub(pattern = "H0351.1012", replacement = "Human", grouped$Species, fixed=TRUE)
grouped$Species <- gsub(pattern = "H0351.1015", replacement = "Human", grouped$Species, fixed=TRUE)
grouped$Species <- gsub(pattern = "H0351.1016", replacement = "Human", grouped$Species, fixed=TRUE)
grouped$Species <- gsub(pattern = "H0351.2001", replacement = "Human", grouped$Species, fixed=TRUE)
grouped$Species <- gsub(pattern = "H0351.2002", replacement = "Human", grouped$Species, fixed=TRUE)
grouped$Species <- gsub(pattern = "MMU36322", replacement = "Macaque", grouped$Species, fixed=TRUE)
grouped$Species <- gsub(pattern = "MMU36358", replacement = "Macaque", grouped$Species, fixed=TRUE)
grouped$Species <- gsub(pattern = "MMU36468", replacement = "Macaque", grouped$Species, fixed=TRUE)
grouped
## # A tibble: 9 Ă— 7
## donor_name DG CA3 CA2 CA1 Sub
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 H0351.1009 0.7676549 -0.04193689 0.03322052 0.11289682 0.3282819
## 2 H0351.1012 0.8250109 0.03132977 -0.02301046 0.34648982 0.2145720
## 3 H0351.1015 0.7211897 0.01957874 -0.17211518 0.03773942 0.1683526
## 4 H0351.1016 0.9328972 0.05146628 0.06154399 0.22528314 0.2916769
## 5 H0351.2001 1.0000000 -0.01401047 -0.01425627 0.12390100 0.1383747
## 6 H0351.2002 0.9359224 0.15095767 0.00778990 0.18173911 0.1293747
## 7 MMU36322 0.6128473 0.37589844 -0.03767784 0.26293415 0.5037124
## 8 MMU36358 0.6489927 0.41550417 0.05003609 0.19845374 0.3223922
## 9 MMU36468 0.6202029 0.23281956 0.07087704 0.22860128 0.3172907
## # ... with 1 more variables: Species <chr>
Can choose whether or not to include CA4. Could also just combine it with DG.
anova(lm(grouped, formula = DG + CA3 + CA2 + CA1 + Sub ~ Species, na.action=na.omit))
## Analysis of Variance Table
##
## Response: DG + CA3 + CA2 + CA1 + Sub
## Df Sum Sq Mean Sq F value Pr(>F)
## Species 1 0.23894 0.23894 4.154 0.08094 .
## Residuals 7 0.40264 0.05752
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Macaques
mac <- filter(grouped, Species=="Macaque") %>% dplyr::select(DG:Sub) %>% na.omit()
mac_cor <- cor(mac, dplyr::select(mac, DG:Sub)) # Correlation matrix
stargazer(mac_cor, type="html")
| DG | CA3 | CA2 | CA1 | Sub | |
| DG | 1 | 0.512 | 0.510 | -0.933 | -0.639 |
| CA3 | 0.512 | 1 | -0.478 | -0.169 | 0.334 |
| CA2 | 0.510 | -0.478 | 1 | -0.785 | -0.988 |
| CA1 | -0.933 | -0.169 | -0.785 | 1 | 0.873 |
| Sub | -0.639 | 0.334 | -0.988 | 0.873 | 1 |
# Humans
hum <- filter(grouped, Species=="Human") %>% dplyr::select(DG:Sub) %>% na.omit()
hum_cor <- cor(hum) # Correlation matrix
stargazer(hum_cor, type="html")
| DG | CA3 | CA2 | CA1 | Sub | |
| DG | 1 | 0.345 | 0.563 | 0.272 | -0.345 |
| CA3 | 0.345 | 1 | 0.094 | 0.290 | -0.470 |
| CA2 | 0.563 | 0.094 | 1 | 0.483 | 0.492 |
| CA1 | 0.272 | 0.290 | 0.483 | 1 | 0.145 |
| Sub | -0.345 | -0.470 | 0.492 | 0.145 | 1 |
# All species
all <- dplyr::select(grouped, DG:Sub) %>% na.omit()
all_cor <- cor(all) # Correlation matrix
stargazer(all_cor, type="html")
| DG | CA3 | CA2 | CA1 | Sub | |
| DG | 1 | -0.659 | 0.054 | -0.122 | -0.701 |
| CA3 | -0.659 | 1 | 0.251 | 0.368 | 0.606 |
| CA2 | 0.054 | 0.251 | 1 | 0.435 | 0.281 |
| CA1 | -0.122 | 0.368 | 0.435 | 1 | 0.372 |
| Sub | -0.701 | 0.606 | 0.281 | 0.372 | 1 |
mantel.rtest(dist(mac_cor), dist(hum_cor)) # Tests whether 2 covariance matrices differ
## Monte-Carlo test
## Observation: -0.08514423
## Call: mantel.rtest(m1 = dist(mac_cor), m2 = dist(hum_cor))
## Based on 99 replicates
## Simulated p-value: 0.65
gdata <- gather(grouped, key = "Subfield", value = "Expression", DG:Sub )
summary(aov(data=gdata, Expression ~ Species)) # Does expression differ across species?
## Df Sum Sq Mean Sq F value Pr(>F)
## Species 1 0.048 0.04779 0.533 0.469
## Residuals 43 3.853 0.08960
summary(aov(data=gdata, Expression ~ Subfield)) # Does expression differe across subfields?
## Df Sum Sq Mean Sq F value Pr(>F)
## Subfield 4 3.273 0.8182 52.15 2.4e-15 ***
## Residuals 40 0.628 0.0157
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(aov(data=adult_HP, Expression ~ structure_name2)) # Do subfields differ from one another?
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Expression ~ structure_name2, data = adult_HP)
##
## $structure_name2
## diff lwr upr p adj
## CA2-CA1 -0.20512726 -0.43753190 0.02727738 0.1133456
## CA3-CA1 -0.06677176 -0.29917641 0.16563288 0.9572576
## CA4-CA1 -0.01541085 -0.25649490 0.22567321 0.9999654
## DG-CA1 0.51941436 0.31727285 0.72155587 0.0000000
## Sub-CA1 0.06571944 -0.16668520 0.29812408 0.9600192
## CA3-CA2 0.13835550 -0.11806923 0.39478022 0.6082369
## CA4-CA2 0.18971641 -0.07460014 0.45403297 0.2940266
## DG-CA2 0.72454162 0.49518837 0.95389487 0.0000000
## Sub-CA2 0.27084670 0.01442197 0.52727143 0.0325933
## CA4-CA3 0.05136092 -0.21295564 0.31567747 0.9924390
## DG-CA3 0.58618612 0.35683288 0.81553937 0.0000000
## Sub-CA3 0.13249120 -0.12393352 0.38891593 0.6512899
## DG-CA4 0.53482521 0.29668131 0.77296910 0.0000002
## Sub-CA4 0.08113029 -0.18318627 0.34544685 0.9437976
## Sub-DG -0.45369492 -0.68304817 -0.22434167 0.0000038
summary(aov(data=gdata, Expression ~ Error(Subfield/Species))) # Repeated-measures ANOVA
##
## Error: Subfield
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 4 3.273 0.8182
##
## Error: Subfield:Species
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 5 0.3706 0.07411
##
## Error: Within
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 35 0.257 0.007343
Wouldn’t necessarily trust the correlation matrices or Mantel test, since the there’s so few individuals per species. This is especially true of macaques, which only have 2-3 individuals.
PCA <- prcomp(select(grouped, DG:Sub) %>% na.omit(), scale=TRUE, center=TRUE)
pander::pander(summary(PCA))
| Â | PC1 | PC2 | PC3 | PC4 | PC5 |
|---|---|---|---|---|---|
| DG | 0.4779 | -0.4905 | 0.0397 | 0.08903 | 0.7222 |
| CA3 | -0.5296 | 0.1214 | -0.02066 | 0.7676 | 0.3394 |
| CA2 | -0.2616 | -0.677 | -0.6388 | 0.02188 | -0.2543 |
| CA1 | -0.3578 | -0.5215 | 0.7561 | -0.07795 | -0.1494 |
| Sub | -0.5429 | 0.1198 | -0.1354 | -0.6295 | 0.5256 |
| Â | PC1 | PC2 | PC3 | PC4 | PC5 |
|---|---|---|---|---|---|
| Standard deviation | 1.609 | 1.131 | 0.7413 | 0.6295 | 0.4315 |
| Proportion of Variance | 0.5177 | 0.2559 | 0.1099 | 0.07926 | 0.03724 |
| Cumulative Proportion | 0.5177 | 0.7736 | 0.8835 | 0.9628 | 1 |
summPCA <- summary(PCA)
# Prep PCA data
grouped_omit <- na.omit(all)
Species <- grouped %>% na.omit() %>% select(Species)
levels(Species) <- c("Human", "Macaque")
DFforPlot <- data.frame(PC1=PCA$x[,1], PC2=PCA$x[,2], Species=Species)
Human <- subset(DFforPlot, Species=="Human")
Macaque <- subset(DFforPlot, Species=="Macaque")
# Plot PCA
ggplot(data=DFforPlot, aes(x=PC1, y=PC2, color=Species, fill=Species)) +
theme_bw(30) +
geom_polygon(aes(x=PC1, y=PC2), alpha=.3, data=Macaque[chull(Macaque$PC1, Macaque$PC2),]) +
geom_polygon(aes(x=PC1, y=PC2), alpha=.3, data=Human[chull(Human$PC1, Human$PC2),]) +
geom_point(size=5) + labs(title = paste(gene_name), x = paste("PC1","(", round(summPCA$importance[2,1]*100, 2), "%)"), y = paste("PC2","(", round(summPCA$importance[2,2]*100, 2), "%)"))
Can’t do interspecies analysis across all brain regions, because 1) not all subdivisions are present in both, 3) different names are often used, 3) humans have bilateral structures.
Can’t do human analysis because NO structure overlaps for all humans.
RNAadult <- filter(RNAseq, donor_age == "18 yrs"|donor_age == "19 yrs"|donor_age == "23 yrs"|donor_age == "30 yrs"|donor_age == "36 yrs"|donor_age == "37 yrs"|donor_age == "40 yrs") %>% droplevels()
# Top Structures: Average
ggplot(data = RNAadult, aes(x=top_level_structure_name, y=Expression, fill=top_level_structure_name)) +
stat_smooth(method="loess", aes(group=1), color="purple") +
geom_bar(stat = "identity", position="dodge") +
labs(title=paste("Adult Human",gene_name,"Expression: Top Structures"), x="Age", y="Expression Level")
# Structures: Average
ggplot(data = RNAadult, aes(x=structure_name, y=Expression, fill=structure_name)) +
geom_bar(stat = "identity", position="dodge") +
labs(title=paste("Adult Human",gene_name,"Expression: Structures"), x="Age", y="Expression Level")+ theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Structure: Interindividual variability
ggplot(data = RNAadult, aes(x=donor_age, y=Expression, fill=structure_name)) +
stat_smooth(method="loess", aes(group=1), color="purple") +
geom_bar(stat = "identity", position="dodge") +
labs(title=paste("Adult Human",gene_name,"Expression: Structures"), x="Age", y="Expression Level")
# Hippocampus only
ggplot(data = filter(RNAadult, structure_name == "hippocampus (hippocampal formation)"), aes(x=donor_age, y=Expression, fill=structure_name)) +
geom_bar(stat = "identity", position="dodge") +
labs(title=paste("Adult Human",gene_name,"Expression: Hippocampus"), x="Age", y="Expression Level")
# Macaque microarray vs. Human RNAseq: all regions
RNAseq_all <- RNAseq %>% mutate(Species = "Human", Expression = rescale(Expression, c(-1,1))) %>% dplyr::select(Species, donor_age, Expression, structure_name)
RNAseq_all$donor_age <- gsub(RNAseq_all$donor_age, pattern = "37 pcw", replacement = "0 mo")
micro_all <- microarray %>% filter(Species=="Macaque") %>%
mutate(Expression = rescale(Expression, c(-1,1))) %>% dplyr::select(Species, donor_age, Expression, structure_name)
micro_all$donor_age <- gsub(micro_all$donor_age, pattern = "48 mo", replacement = "2 yrs")
micro_all$donor_age <- gsub(micro_all$donor_age, pattern = "12 mo", replacement = "1 yrs")
RNA_micro_all <- rbind(RNAseq_all, micro_all)
ggplot(data = RNA_micro_all, aes(x=donor_age, y=rescale(Expression, c(-1, 1)), fill=structure_name)) +
stat_smooth(method="loess", aes(group=1), color="purple") + facet_wrap(~Species, nrow = 2) +
geom_bar(stat = "identity", position="dodge") +
labs(title=paste("Human",gene_name,"Expression Over Development: Structures"), x="Age", y="Expression Level") +
scale_x_discrete(limits=c("E40", "E50", "8 pcw", "9 pcw","E70", "E80", "12 pcw", "E90", "13 pcw", "16 pcw", "17 pcw", "E120", "19 pcw", "21 pcw", "24 pcw", "25 pcw", "26 pcw", "35 pcw", "0 mo", "3 mo", "4 mos", "10 mos", "1 yrs", "2 yrs", "3 yrs", "4 yrs", "8 yrs", "11 yrs", "13 yrs", "15 yrs", "18 yrs", "19 yrs", "23 yrs", "30 yrs", "36 yrs", "37 yrs", "40 yrs")) + theme(legend.position = "none", legend.key.size = unit(.1, "cm"), axis.text.x = element_text(angle = 45, hjust = 1))
# Macaque microarray vs. Human RNAseq: hippocampus
RNAseq_HP <- RNAseq %>% filter(structure_name == "hippocampus (hippocampal formation)") %>% mutate(Species = "Human", Expression = rescale(Expression, c(-1,1))) %>% dplyr::select(Species, donor_age, Expression)
RNAseq_HP$donor_age <- gsub(RNAseq_HP$donor_age, pattern = "37 pcw", replacement = "0 mo")
micro_HP <- microarray %>% filter(Species=="Macaque") %>%
mutate(Expression = rescale(Expression, c(-1,1))) %>% dplyr::select(Species, donor_age, Expression)
micro_HP$donor_age <- gsub(micro_HP$donor_age, pattern = "48 mo", replacement = "2 yrs")
micro_HP$donor_age <- gsub(micro_HP$donor_age, pattern = "12 mo", replacement = "1 yrs")
RNA_micro_HP <- rbind(RNAseq_HP, micro_HP)
gr_RNA_micro_HP <- RNA_micro_HP %>% group_by(Species, donor_age) %>% dplyr::summarise(Expression = mean(Expression, na.rm = TRUE))
ggplot(data = gr_RNA_micro_HP, aes(x=donor_age, y=rescale(Expression, c(-1, 1)), fill=Species)) +
stat_smooth(method="loess", aes(group=Species)) + facet_wrap(~Species, nrow = 2) +
geom_bar(stat = "identity", position="dodge") +
labs(title=paste("Human",gene_name,"Expression Over Development: Hippocampus"), x="Age", y="Expression Level") +
scale_x_discrete(limits=c("E40", "E50", "8 pcw", "9 pcw","E70", "E80", "12 pcw", "E90", "13 pcw", "16 pcw", "17 pcw", "E120", "19 pcw", "21 pcw", "24 pcw", "25 pcw", "26 pcw", "35 pcw", "0 mo", "3 mo", "4 mos", "10 mos", "1 yrs", "2 yrs", "3 yrs", "4 yrs", "8 yrs", "11 yrs", "13 yrs", "15 yrs", "18 yrs", "19 yrs", "23 yrs", "30 yrs", "36 yrs", "37 yrs", "40 yrs")) + theme(text = element_text(size=10), axis.text.x = element_text(angle=45, vjust=0.5), legend.position = "top")
RNAedit <- RNAadult %>% select(donor_name, donor_age, structure_name, structure_abbreviation, top_level_structure_name, Expression) %>% mutate(Method = "RNAseq")
microedit <- human %>% select(donor_name, donor_age, structure_name, structure_abbreviation, top_level_structure_name, Expression) %>% mutate(Method = "microarray")
RNA_micro <- rbind(RNAedit, microedit)
RNA_micro <- merge(RNAedit, microedit, by = "structure_abbreviation")
Can’t compare across structures bc of different anatomical levels and nomenclature. But COULD compare within a structure (e.g. hippocampus) across many genes. Bioinformatics?